load data

geno <- read_delim("tables/TableS1_genotypes.tsv")
pheno <- read_delim("tables/TableS2_phenotypes_metadata.tsv")
cipro_antibiogram <- full_join(geno,pheno) %>%
  mutate(SRnwt = if_else(Resistance.phenotype=="I", "NWT", Resistance.phenotype)) %>%
  mutate(SRnwt = factor(SRnwt, levels=c("S","NWT","R")))

set colours

wt_colours <- c(`non-wt (I/R)`="IndianRed", `wt (S)`= "LightBlue")
res_colours <- c("I"="grey", "R"="IndianRed", "S"="LightBlue", "NWT"="grey")

long form, one row per strain/determinant combination

cipro_res_mech <- cipro_antibiogram %>%
  mutate(Aac6_gene=if_else(aac6 == 1, "aac(6`)-Ib-cr", "-")) %>%
  pivot_longer(c(Flq_mutations,Flq_acquired, Aac6_gene), names_to = "marker.type", 
               values_to = "marker") %>%
  separate_longer_delim(cols=marker, delim=";")

Numbers reported in text - para 3

# effect of ParC when added to GyrA-83 background (absence of any genes or GyrA-87)

# MIC data
QRDR_MIC_GyrA83background_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & acquired_genes==0 & `GyrA-83`==1 & `GyrA-87`==0) %>% 
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, ParC_mutations, `ParC-80`, `ParC-84`)

wilcox.test(log2(as.numeric(Measurement)) ~ ParC_mutations, data=QRDR_MIC_GyrA83background_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by ParC_mutations
## W = 13142, p-value = 0.001123
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ ParC_mutations, data=QRDR_MIC_GyrA83background_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ ParC_mutations, 
##     data = QRDR_MIC_GyrA83background_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0946 -0.0946 -0.0946  0.2586  7.9054 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.7414     0.1394   5.320 1.43e-07 ***
## ParC_mutations   0.3532     0.1460   2.419   0.0158 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.061 on 648 degrees of freedom
## Multiple R-squared:  0.008949,   Adjusted R-squared:  0.00742 
## F-statistic: 5.852 on 1 and 648 DF,  p-value: 0.01584
summary(as.numeric(QRDR_MIC_GyrA83background_noGenes_dat$Measurement)[QRDR_MIC_GyrA83background_noGenes_dat$`ParC_mutations`==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.25    2.00    2.00    4.69    2.00  512.00
summary(as.numeric(QRDR_MIC_GyrA83background_noGenes_dat$Measurement)[QRDR_MIC_GyrA83background_noGenes_dat$`ParC_mutations`==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   1.000   1.000   2.901   4.000  16.000
# disk diffusion
QRDR_DD_GyrA83background_noGenes_dat <- cipro_antibiogram %>%
  filter(aac6==0 & acquired_genes==0 & `GyrA-83`==1 & `GyrA-87`==0) %>% 
  filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, ParC_mutations, `ParC-80`, `ParC-84`)

wilcox.test(as.numeric(Measurement) ~ ParC_mutations, data=QRDR_DD_GyrA83background_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by ParC_mutations
## W = 1112.5, p-value = 5.475e-09
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ ParC_mutations, data=QRDR_DD_GyrA83background_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ ParC_mutations, data = QRDR_DD_GyrA83background_noGenes_dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.870 -3.585 -1.727  3.202 20.415 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      18.870      1.006   18.76  < 2e-16 ***
## ParC_mutations   -9.285      1.204   -7.71 4.58e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.823 on 74 degrees of freedom
## Multiple R-squared:  0.4454, Adjusted R-squared:  0.4379 
## F-statistic: 59.44 on 1 and 74 DF,  p-value: 4.582e-11
summary(as.numeric(QRDR_DD_GyrA83background_noGenes_dat$Measurement)[QRDR_DD_GyrA83background_noGenes_dat$ParC_mutations==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.000   6.000   6.000   9.585  13.000  30.000
summary(as.numeric(QRDR_DD_GyrA83background_noGenes_dat$Measurement)[QRDR_DD_GyrA83background_noGenes_dat$ParC_mutations==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.00   16.00   20.00   18.87   22.00   25.00
# test categorial phenotypes

# all genomes with no acquired genes (MIC/DD)
QRDR_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & acquired_genes==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, QRDR_mutations) %>%
  mutate(qrdr_bin=if_else(QRDR_mutations==0, 0, 1))

# nonWT vs presence/absence
fisher.test(table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   525.1282 1264.3917
## sample estimates:
## odds ratio 
##   826.9416
# resistance vs presence/absence
fisher.test(table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1050.256 2419.794
## sample estimates:
## odds ratio 
##   1521.631

Numbers reported in text - para 4

effect of presence/absence of any QRDR, in absence of any acquired genes

# total strains without PMQR or aac6
nrow(QRDR_noGenes_dat)
## [1] 7457
# presence of QRDR vs MIC
QRDR_MIC_noGenes_dat <- cipro_antibiogram %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
    filter(aac6==0 & acquired_genes==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, QRDR_mutations, `GyrA-83`, `ParC-80`) %>%
  mutate(qrdr_bin=if_else(QRDR_mutations==0, 0, 1))

wilcox.test(log2(as.numeric(Measurement)) ~ qrdr_bin, data=QRDR_MIC_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by qrdr_bin
## W = 98162, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ qrdr_bin, data=QRDR_MIC_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ qrdr_bin, data = QRDR_MIC_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1830 -0.1830 -0.0414 -0.0414  7.9586 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.95856    0.01638  -119.6   <2e-16 ***
## qrdr_bin     3.14153    0.02793   112.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9207 on 4814 degrees of freedom
## Multiple R-squared:  0.7243, Adjusted R-squared:  0.7243 
## F-statistic: 1.265e+04 on 1 and 4814 DF,  p-value: < 2.2e-16
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$qrdr_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   2.000   2.000   5.536   2.000 512.000
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$qrdr_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3421  0.2500 64.0000
# presence of QRDR vs DD zone
QRDR_DD_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & acquired_genes==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, QRDR_mutations) %>%
  mutate(qrdr_bin=if_else(QRDR_mutations==0, 0, 1))

wilcox.test(as.numeric(Measurement) ~ qrdr_bin, data=QRDR_DD_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by qrdr_bin
## W = 320646, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ qrdr_bin, data=QRDR_DD_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ qrdr_bin, data = QRDR_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6659  -1.6659   0.3341   1.3341  18.1154 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  28.66587    0.06349  451.51   <2e-16 ***
## qrdr_bin    -16.78125    0.28616  -58.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.181 on 2639 degrees of freedom
## Multiple R-squared:  0.5658, Adjusted R-squared:  0.5656 
## F-statistic:  3439 on 1 and 2639 DF,  p-value: < 2.2e-16
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$qrdr_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00    6.00    9.00   11.88   17.00   30.00
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$qrdr_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
# presence of QRDR vs nonWT
fisher.test(table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   525.1282 1264.3917
## sample estimates:
## odds ratio 
##   826.9416
# presence of QRDR vs R
fisher.test(table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1050.256 2419.794
## sample estimates:
## odds ratio 
##   1521.631

effect of number of QRDR mutations, in absence of any acquired genes

# MIC vs QRDR count
QRDR_MIC_noGenes_dat <- QRDR_MIC_noGenes_dat %>%
  mutate(QRDR_0_1_2 = if_else(QRDR_mutations>2, 2, QRDR_mutations)) %>%
  mutate(QRDR_0_1_2_3 = if_else(QRDR_mutations>3, 3, QRDR_mutations)) %>%
  mutate(QRDR_1_2 = if_else(QRDR_0_1_2==0, NA, QRDR_0_1_2)) %>%
  mutate(QRDR_2_3 = if_else(QRDR_0_1_2<2, NA, QRDR_0_1_2_3))

# median MICs, grouped by QRDR count
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$QRDR_mutations==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3421  0.2500 64.0000
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$QRDR_mutations==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   1.000   1.000   2.597   2.000  16.000
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$QRDR_mutations>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   2.000   2.000   6.255   2.000 512.000
# test for difference in MIC with QRDR count
summary(lm(log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), data=QRDR_MIC_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), 
##     data = QRDR_MIC_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2102 -0.2102 -0.0414 -0.0414  7.9586 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.95856    0.01633 -119.96   <2e-16 ***
## factor(QRDR_0_1_2)1  2.54190    0.10939   23.24   <2e-16 ***
## factor(QRDR_0_1_2)2  3.16879    0.02825  112.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9178 on 4813 degrees of freedom
## Multiple R-squared:  0.7262, Adjusted R-squared:  0.7261 
## F-statistic:  6382 on 2 and 4813 DF,  p-value: < 2.2e-16
summary(lm(log2(as.numeric(Measurement)) ~ QRDR_mutations, data=QRDR_MIC_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ QRDR_mutations, 
##     data = QRDR_MIC_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5764 -0.1041 -0.1041 -0.1041  8.5810 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1.89590    0.01706  -111.1   <2e-16 ***
## QRDR_mutations  1.15743    0.01110   104.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9712 on 4814 degrees of freedom
## Multiple R-squared:  0.6933, Adjusted R-squared:  0.6932 
## F-statistic: 1.088e+04 on 1 and 4814 DF,  p-value: < 2.2e-16
# DD vs QRDR count
QRDR_DD_noGenes_dat <- QRDR_DD_noGenes_dat %>%
  mutate(QRDR_0_1_2 = if_else(QRDR_mutations>2, 2, QRDR_mutations)) %>%
  mutate(QRDR_0_1_2_3 = if_else(QRDR_mutations>3, 3, QRDR_mutations)) %>%
  mutate(QRDR_1_2 = if_else(QRDR_0_1_2==0, NA, QRDR_0_1_2)) %>%
  mutate(QRDR_2_3 = if_else(QRDR_0_1_2<2, NA, QRDR_0_1_2_3))

# median DD zones, grouped by QRDR count
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$QRDR_mutations==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$QRDR_mutations==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.00   17.00   20.50   19.75   22.25   29.00
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$QRDR_mutations>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.000   6.000   6.000   6.676   6.000  12.000
# test for difference in DD zone with QRDR count
summary(lm(log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), data=QRDR_DD_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), 
##     data = QRDR_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24805 -0.07812  0.02497  0.07388  1.96617 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4.833011   0.003863 1251.15   <2e-16 ***
## factor(QRDR_0_1_2)1 -0.564658   0.030848  -18.30   <2e-16 ***
## factor(QRDR_0_1_2)2 -1.892292   0.020766  -91.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1936 on 2638 degrees of freedom
## Multiple R-squared:  0.7645, Adjusted R-squared:  0.7644 
## F-statistic:  4283 on 2 and 2638 DF,  p-value: < 2.2e-16
summary(lm(log2(as.numeric(Measurement)) ~ QRDR_mutations, data=QRDR_DD_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ QRDR_mutations, 
##     data = QRDR_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24831 -0.07839  0.02471  0.07362  1.57431 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.833275   0.003884 1244.46   <2e-16 ***
## QRDR_mutations -0.750349   0.008203  -91.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1953 on 2639 degrees of freedom
## Multiple R-squared:  0.7602, Adjusted R-squared:  0.7601 
## F-statistic:  8367 on 1 and 2639 DF,  p-value: < 2.2e-16

Fig1 - number of QRDR vs MIC, facet aac6 yes/no (no other genes)

#MIC distribution for # QRDR, in absence of genes
QRDR_MIC_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
    geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
    facet_wrap(vars(aac6_label)) +
    scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
    scale_color_manual(values = res_colours) + 
    theme_light() + 
    labs(y="MIC (mg/L)", x="", col="Phenotype", 
         title="a) No. QRDR mutations vs phenotype",
         subtitle="(in absence of PMQR genes)")

QRDR_MIC_noGenes

QRDR_pheno_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
    scale_fill_manual(values = res_colours) + 
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of QRDR mutations") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())

Fig S6 - number of QRDR vs DD, facet aac6 yes/no (no other genes)

# DD distribution for # QRDR, in absence of genes
QRDR_DD_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
    geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
    facet_wrap(vars(aac6_label)) +
    scale_colour_manual(values = res_colours) + 
    theme_light() + 
    labs(y="Disk zone (mm)", x="", col="Phenotype",
         title="No. QRDR mutations vs phenotype",
         subtitle="(in absence of PMQR genes)")

QRDR_DDpheno_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
    scale_fill_manual(values = res_colours) +  
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of QRDR mutations") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())
QRDR_DD_noGenes / QRDR_DDpheno_noGenes + plot_layout(heights=c(2,1))

ggsave("figs/FigS6_numQRDR_DD_pheno.pdf", width=6, height=4)
ggsave("figs/FigS6_numQRDR_DD_pheno.png", width=6, height=4)

Fig S4 panel a - upset plot of QRDR positions, absence of aac6/acquired genes, with MIC data

upset_QRDR_noAac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==0 & acquired_genes==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=c("ParC-84","ParC-80","GyrA-87","GyrA-83","wt_QRDR"), # counts are converted to binary
        sort_sets=FALSE,
      intersections=list(
        'wt_QRDR',
        "GyrA-83",
        "GyrA-87",
        c("GyrA-83","ParC-80"),
        c("GyrA-83","ParC-84"),
        c("GyrA-83","GyrA-87","ParC-80"),
        c("GyrA-83","GyrA-87","ParC-84")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing WT/nonWT
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
                 scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_colour_manual(values = res_colours) + 
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^9)) +
              theme(legend.position = "none", axis.title=element_text(size=8))  +
              ggtitle("aac(6`)-Ib-cr absent")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_QRDR_noAac6_MIC

Fig S4 panel b - upset plot of QRDR positions, presence of aac6 but no other acquired genes, with MIC data

upset_QRDR_aac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==1 & acquired_genes==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=c("ParC-84","ParC-80","GyrA-87","GyrA-83","wt_QRDR"), # counts are converted to binary
        sort_sets=FALSE,
      intersections=list(
        'wt_QRDR',
        "GyrA-83",
        "GyrA-87",
        c("GyrA-83","ParC-80"),
        c("GyrA-83","ParC-84"),
        c("GyrA-83","GyrA-87","ParC-80"),
        c("GyrA-83","GyrA-87","ParC-84")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing WT/nonWT
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
              scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_colour_manual(values = res_colours) + 
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^9)) +
              labs(colour = "Phenotype") +
              theme(axis.title=element_text(size=8)) +
              ggtitle("aac(6`)-Ib-cr present")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas
upset_QRDR_aac6_MIC

Fig S5 panel a - QRDR positions, absence of aac6/acquired genes, with DD data

upset_QRDR_noAac6_DD <- cipro_antibiogram %>% 
  filter(aac6==0 & acquired_genes==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=c("ParC-84","ParC-80","GyrA-87","GyrA-83","wt_QRDR"), # counts are converted to binary
        sort_sets=FALSE,
      intersections=list(
        'wt_QRDR',
        "GyrA-83",
        "GyrA-87",
        c("GyrA-83","ParC-80"),
        c("GyrA-83","GyrA-87","ParC-80"),
        c("GyrA-83","GyrA-87","ParC-84"),
        c("GyrA-83","GyrA-87","ParC-80","ParC-84")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_colour_manual(values = res_colours) + 
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("Disk diffusion data") +
              theme(axis.title=element_text(size=8), legend.position="none") + 
              ggtitle("aac(6`)-Ib-cr absent") + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_QRDR_noAac6_DD

Fig S5 panel b - QRDR positions, presence of aac6 but no other acquired genes, with DD data

upset_QRDR_aac6_DD <- cipro_antibiogram %>% 
  filter(aac6==1 & acquired_genes==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=c("ParC-84","ParC-80","GyrA-87","GyrA-83","wt_QRDR"), # counts are converted to binary
        sort_sets=FALSE,
      intersections=list(
        'wt_QRDR',
        "GyrA-83",
        "GyrA-87",
        c("GyrA-83","ParC-80"),
        c("GyrA-83","GyrA-87","ParC-80"),
        c("GyrA-83","GyrA-87","ParC-84"),
        c("GyrA-83","GyrA-87","ParC-80","ParC-84")),  
        sort_intersections=FALSE,      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
               scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_color_manual(values = c("I"="grey", "R"="IndianRed", "S"="LightBlue")) + 
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("aac(6`)-Ib-cr present") +
              theme(axis.title=element_text(size=8)) + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_QRDR_aac6_DD

Supp Figs 4 and 5: QRDR sites vs pheno (MIC & DD, with/without aac6)

upset_QRDR_noAac6_MIC | upset_QRDR_aac6_MIC

ggsave("figs/FigS4_QRDR_MIC.pdf", width=9, height=6)
ggsave("figs/FigS4_QRDR_MIC.png", width=9, height=6)


upset_QRDR_noAac6_DD | upset_QRDR_aac6_DD

ggsave("figs/FigS5_QRDR_DD.pdf", width=9, height=6)
ggsave("figs/FigS5_QRDR_DD.png", width=9, height=6)

set up binary variables for common PMQR genes

# genes in at least 10 genomes
qnr_genes <- cipro_res_mech %>% 
  mutate(marker=gsub("[\\^\\*\\?]", "", marker)) %>%
  mutate(marker=gsub(".v1", "", marker)) %>%
  mutate(marker=gsub(".v2", "", marker)) %>%
  group_by(marker) %>% 
  count() %>% filter(startsWith(marker, "q")) %>% 
  filter(n>10) %>% pull(marker)

qnr_presence <- cipro_res_mech %>% 
  mutate(marker=gsub("[\\^\\*\\?]", "", marker)) %>%
  mutate(marker=gsub(".v1", "", marker)) %>%
  mutate(marker=gsub(".v2", "", marker)) %>%
  filter(marker %in% qnr_genes) %>%
  group_by(strain, marker) %>% count() %>% pivot_wider(names_from=marker, values_from=n)

cipro_antibiogram <- left_join(cipro_antibiogram, qnr_presence)

Fig S7 panel a - upset plot of qnr genes, absence of aac6 and QRDR, with MIC data

upset_qnr_noAac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==0 & QRDR_mutations==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=rev(qnr_genes), 
      sort_sets=FALSE,
        intersections=list(
        'Outside of known sets',
        "qnrA1",
        "qnrB17",
        "qnrB19",
        "qnrB4",
        "qnrB6",
        "qnrS1",
        c("qnrB4","qnrS1"),
        c("qnrB6","qnrS1"),
        c("qnrB6","qnrB19")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
              scale_color_manual(values = res_colours)+
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:6), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^6)) +
              theme(legend.position = "none", axis.title=element_text(size=8))  +
              ggtitle("aac(6`)-Ib-cr absent")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas
upset_qnr_noAac6_MIC

Fig S7 panel- upset plot of qnr genes, presence of aac6 but no QRDR, with MIC data

upset_qnr_aac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==1 & QRDR_mutations==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=rev(qnr_genes), 
      sort_sets=FALSE,
        intersections=list(
        'Outside of known sets',
        "qnrA1",
        "qnrB17",
        "qnrB19",
        "qnrB4",
        "qnrB6",
        "qnrS1",
        c("qnrB4","qnrS1"),
        c("qnrB6","qnrS1"),
        c("qnrB6","qnrB19")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
                scale_color_manual(values = res_colours)+
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:6), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^6)) +
              labs(colour = "Phenotype") +
              theme(axis.title=element_text(size=8)) +
              ggtitle("aac(6`)-Ib-cr present")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas
upset_qnr_aac6_MIC

Fig S8 panel - qnr genes, absence of aac6 and QRDR mutations, with DD data

upset_qnr_noAac6_DD <- cipro_antibiogram %>% 
  filter(aac6==0 & QRDR_mutations==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=rev(qnr_genes), 
      sort_sets=FALSE,
        intersections=list(
        'Outside of known sets',
        "qnrA1",
        "qnrB17",
        "qnrB19",
        "qnrB4",
        "qnrB6",
        "qnrS1",
        c("qnrB4","qnrS1"),
        c("qnrB6","qnrS1"),
        c("qnrB6","qnrB19")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
               scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
                scale_color_manual(values = res_colours)+
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("Disk diffusion data") +
              theme(axis.title=element_text(size=8), legend.position="none") + 
              ggtitle("aac(6`)-Ib-cr absent") + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_qnr_noAac6_DD

Fig S8 panel - qnr genes, presence of aac6 but no QRDR mutations, with DD data

upset_qnr_aac6_DD <- cipro_antibiogram %>% 
  filter(aac6==1 & QRDR_mutations==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=rev(qnr_genes), 
      sort_sets=FALSE,
        intersections=list(
        'Outside of known sets',
        "qnrA1",
        "qnrB17",
        "qnrB19",
        "qnrB4",
        "qnrB6",
        "qnrS1",
        c("qnrB4","qnrS1"),
        c("qnrB6","qnrS1"),
        c("qnrB6","qnrB19")),  
        sort_intersections=FALSE,
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
               scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
                scale_color_manual(values = res_colours)+
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("aac(6`)-Ib-cr present") +
              theme(axis.title=element_text(size=8)) + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_qnr_aac6_DD

Supp Figs S7 and S8: qnr genes vs pheno (MIC & DD, with/without aac6)

upset_qnr_noAac6_MIC | upset_qnr_aac6_MIC

ggsave("figs/FigS7_qnr_MIC.pdf", width=12, height=6)
ggsave("figs/FigS7_qnr_MIC.png", width=12, height=6)


upset_qnr_noAac6_DD | upset_qnr_aac6_DD

ggsave("figs/FigS8_qnr_DD.pdf", width=12, height=6)
ggsave("figs/FigS8_qnr_DD.png", width=12, height=6)

numbers for text - paragraph 5

# effect of qnr/qep genes, in absence of QRDR and aac6

# MIC data in absence of QRDR and aac6
qnr_MIC_nullBG_dat <- cipro_antibiogram %>%
  filter(Laboratory.Typing.Method !="Disk diffusion") %>%
  filter(aac6==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, acquired_genes) %>%
  mutate(acquired_bin=if_else(acquired_genes==0, 0, 1))

# MIC vs presence/absence of qnr, in absence of QRDR and aac6
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   0.500   1.000   1.575   2.000   8.000
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3421  0.2500 64.0000
wilcox.test(log2(as.numeric(Measurement)) ~ acquired_bin, data=qnr_MIC_nullBG_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by acquired_bin
## W = 140287, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ acquired_bin, data=qnr_MIC_nullBG_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ acquired_bin, data = qnr_MIC_nullBG_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1845 -0.0414 -0.0414 -0.0414  7.9586 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.95856    0.01615 -121.24   <2e-16 ***
## acquired_bin  2.14311    0.03952   54.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9081 on 3792 degrees of freedom
## Multiple R-squared:  0.4368, Adjusted R-squared:  0.4367 
## F-statistic:  2941 on 1 and 3792 DF,  p-value: < 2.2e-16
# DD data in absence of QRDR and aac6
qnr_DD_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, acquired_genes) %>%
  mutate(acquired_bin=if_else(acquired_genes==0, 0, 1))

# DD vs presence/absence of qnr, in absence of QRDR and aac6

summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   17.00   21.00   19.51   22.00   31.00
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
wilcox.test(as.numeric(Measurement) ~ acquired_bin, data=qnr_DD_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by acquired_bin
## W = 230345, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ acquired_bin, data=qnr_DD_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ acquired_bin, data = qnr_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6659  -1.6659   0.3341   1.3341  15.3341 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  28.66587    0.05979  479.41   <2e-16 ***
## acquired_bin -9.15545    0.31160  -29.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.996 on 2605 degrees of freedom
## Multiple R-squared:  0.2489, Adjusted R-squared:  0.2486 
## F-statistic: 863.3 on 1 and 2605 DF,  p-value: < 2.2e-16
# all genomes with no acquired genes (MIC/DD)
qnr_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, acquired_genes) %>%
  mutate(acquired_bin=if_else(acquired_genes==0, 0, 1))

dim(qnr_noGenes_dat)
## [1] 6401    6
# NWT vs presence/absence of qnr, in absence of QRDR and aac6
fisher.test(table(qnr_noGenes_dat$nonWT_binary, qnr_noGenes_dat$acquired_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(qnr_noGenes_dat$nonWT_binary, qnr_noGenes_dat$acquired_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   86.53681 155.63478
## sample estimates:
## odds ratio 
##   115.5569
# resistance vs presence/absence of qnr, in absence of QRDR and aac6
fisher.test(table(qnr_noGenes_dat$resistant, qnr_noGenes_dat$acquired_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(qnr_noGenes_dat$resistant, qnr_noGenes_dat$acquired_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   74.21361 119.59424
## sample estimates:
## odds ratio 
##   93.70919
# MIC vs qnr count
summary(lm(log2(as.numeric(Measurement)) ~ acquired_genes, data=qnr_MIC_nullBG_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ acquired_genes, 
##     data = qnr_MIC_nullBG_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1339 -0.0479 -0.0479 -0.0479  7.9521 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1.95206    0.01614 -120.96   <2e-16 ***
## acquired_genes  2.04298    0.03781   54.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9096 on 3792 degrees of freedom
## Multiple R-squared:  0.435,  Adjusted R-squared:  0.4348 
## F-statistic:  2919 on 1 and 3792 DF,  p-value: < 2.2e-16
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_genes==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   0.500   1.000   1.546   2.000   8.000
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_genes==2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.5     1.0     2.0     2.5     4.0     4.0
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_genes>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 
# DD vs qnr count
summary(lm(as.numeric(Measurement) ~ acquired_genes, data=qnr_DD_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ acquired_genes, data = qnr_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6642  -1.6642   0.3358   1.3358  15.3358 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    28.66417    0.05939  482.65   <2e-16 ***
## acquired_genes -8.65839    0.28782  -30.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.978 on 2605 degrees of freedom
## Multiple R-squared:  0.2578, Adjusted R-squared:  0.2575 
## F-statistic:   905 on 1 and 2605 DF,  p-value: < 2.2e-16
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_genes==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   18.00   21.00   19.91   22.50   31.00
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_genes==2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0     6.0    10.0    12.2    17.0    22.0
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_genes>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 

Figure 2 - number of qnr/qep vs MIC, facet aac6 yes/no (no QRDR)

# MIC distribution for # QRDR, in absence of genes
qnr_MIC_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
    geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
    facet_wrap(vars(aac6_label)) +
    scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
    scale_color_manual(values = res_colours) + 
    theme_light() + 
    labs(y="MIC (mg/L)", x="", col="Phenotype", 
         title="b) No. PMQR genes vs phenotype",
         subtitle="(in absence of QRDR mutations)")


qnr_pheno_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
       scale_fill_manual(values = res_colours) + 
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of PMQR genes") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())

Fig 2

QRDR_MIC_noGenes / QRDR_pheno_noGenes / qnr_MIC_noGenes / qnr_pheno_noGenes + plot_layout(heights=c(3,1, 3, 1))

ggsave("figs/Fig2_numQnr_numPMQR_MIC_pheno.pdf", width=6, height=10)
ggsave("figs/Fig2_numQnr_numPMQR_MIC_pheno.png", width=6, height=10)

Fig S9 - number of qnr vs DD, facet aac6 yes/no (no QRDR)

# DD distribution for # qnr, in absence of genes
qnr_DD_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
    geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
    facet_wrap(vars(aac6_label)) +
      scale_color_manual(values = res_colours) + 
    theme_light() + 
    labs(y="Disk zone (mm)", x="", col="Phenotype",
         title="No. PMQR genes vs phenotype",
         subtitle="(in absence of QRDR mutations)")


qnr_DDpheno_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6`)-Ib-cr absent", "aac(6`)-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
      scale_fill_manual(values = res_colours) + 
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of PMQR genes") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())
qnr_DD_noGenes / qnr_DDpheno_noGenes + plot_layout(heights=c(2,1))

ggsave("figs/FigS9_numQnr_DD_pheno.pdf", width=6, height=4)
ggsave("figs/FigS9_numQnr_DD_pheno.png", width=6, height=4)

Fig3 panel b - aac6, QRDR mutations, qnr with DD data

upset_aac6_simple_DD <- cipro_antibiogram %>% 
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=c("aac6","acquired_genes", "QRDR_mutations"),# counts are converted to binary
      sort_sets=FALSE,
      set_sizes=(
        upset_set_size()
        + theme(axis.text.x=element_text(angle=45))),
      intersections=list(
        'Outside of known sets',
        "aac6",
        "acquired_genes",
        'QRDR_mutations',
        c("aac6","acquired_genes"),
        c("aac6","QRDR_mutations"),
        c("acquired_genes","QRDR_mutations"),
        c("aac6","acquired_genes","QRDR_mutations")),
            sort_intersections=FALSE,
        labeller=ggplot2::as_labeller(c(
        'QRDR_mutations'='QRDR mutations',
        'acquired_genes'='PMQR genes', 
        'aac6'="aac(6`)-Ib-cr")),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
              scale_color_manual(values = res_colours)+
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("b) Disk diffusion data") +
              ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_aac6_simple_DD

Fig3 panel a - aac6, QRDR mutations, qnr with MIC data

upset_aac6_simple_MIC <- cipro_antibiogram %>% 
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=c("aac6","acquired_genes", "QRDR_mutations"),# counts are converted to binary
      sort_sets=FALSE,
      set_sizes=(
        upset_set_size()
        + theme(axis.text.x=element_text(angle=45))),
      intersections=list(
        'Outside of known sets',
        "aac6",
        "acquired_genes",
        'QRDR_mutations',
        c("aac6","acquired_genes"),
        c("aac6","QRDR_mutations"),
        c("acquired_genes","QRDR_mutations"),
        c("aac6","acquired_genes","QRDR_mutations")),
        labeller=ggplot2::as_labeller(c(
        'QRDR_mutations'='QRDR mutations',
        'acquired_genes'='PMQR genes', 
        'aac6'="aac(6`)-Ib-cr")),   
      sort_intersections=FALSE,
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
             scale_fill_manual(values = res_colours) ,
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 1, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 2, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_color_manual(values = res_colours)+
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:6), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^6)) +
              labs(colour = "Phenotype") +
              theme(legend.position="none") + 
              ggtitle("a) MIC data")
        )

      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_aac6_simple_MIC

Fig3: aac6, QRDR mutations, qnr with MIC & DD data

upset_aac6_simple_MIC | upset_aac6_simple_DD

ggsave("figs/Fig3_simple_comb_MIC_DD.pdf", width=12, height=6)
ggsave("figs/Fig3_simple_comb_MIC_DD.png", width=12, height=6)

numbers for text - paragraph 6, effect of aac6 gene, in absence of QRDR and other acquired

# MIC
aac_MIC_nullBG_dat <- cipro_antibiogram %>%
  filter(Laboratory.Typing.Method !="Disk diffusion") %>%
    filter(acquired_genes==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, aac6)

# MIC vs presence/absence of qnr
wilcox.test(log2(as.numeric(Measurement)) ~ aac6, data=aac_MIC_nullBG_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by aac6
## W = 175387, p-value = 7.223e-14
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ aac6, data=aac_MIC_nullBG_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ aac6, data = aac_MIC_nullBG_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1003 -0.0414 -0.0414 -0.0414  7.9586 
## 
## Coefficients:
##             Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.95856    0.01528 -128.145   <2e-16 ***
## aac6         0.62220    0.07203    8.639   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8592 on 3307 degrees of freedom
## Multiple R-squared:  0.02207,    Adjusted R-squared:  0.02177 
## F-statistic: 74.62 on 1 and 3307 DF,  p-value: < 2.2e-16
summary(as.numeric(aac_MIC_nullBG_dat$Measurement)[aac_MIC_nullBG_dat$aac6==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1200  0.2500  0.2500  0.6249  0.5000  4.0000
summary(as.numeric(aac_MIC_nullBG_dat$Measurement)[aac_MIC_nullBG_dat$aac6==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3421  0.2500 64.0000
aac_MIC_nullBG_dat %>% group_by(aac6) %>% summarise(S=mean(Resistance.phenotype=="S"), I=mean(Resistance.phenotype=="I"), R=mean(Resistance.phenotype=="R"))
# DD
aac_DD_nullBG_dat <- cipro_antibiogram %>%
    filter(acquired_genes==0 & QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, aac6)

wilcox.test(as.numeric(Measurement) ~ aac6, data=aac_DD_nullBG_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by aac6
## W = 25156, p-value = 5.356e-05
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ aac6, data=aac_DD_nullBG_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ aac6, data = aac_DD_nullBG_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6659  -1.6659   0.3341   1.3341  15.3341 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.66587    0.05766 497.182  < 2e-16 ***
## aac6        -5.08254    0.83602  -6.079 1.39e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.889 on 2521 degrees of freedom
## Multiple R-squared:  0.01445,    Adjusted R-squared:  0.01406 
## F-statistic: 36.96 on 1 and 2521 DF,  p-value: 1.389e-09
summary(as.numeric(aac_DD_nullBG_dat$Measurement)[aac_DD_nullBG_dat$aac6==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   21.00   23.50   23.58   27.25   29.00
summary(as.numeric(aac_DD_nullBG_dat$Measurement)[aac_DD_nullBG_dat$aac6==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
aac_DD_nullBG_dat %>% group_by(aac6) %>% summarise(S=mean(Resistance.phenotype=="S"), I=mean(Resistance.phenotype=="I"), R=mean(Resistance.phenotype=="R"))
# all genomes with no acquired genes (MIC/DD)
qnr_noGenes_dat <- cipro_antibiogram %>%
    filter(acquired_genes==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, aac6)

dim(qnr_noGenes_dat)
## [1] 5832    5